---
title: "Model Perplexity"
header-includes:
  - \usepackage{multirow}
author: "Jeff Lewis"
date: "7/10/2021"
output: 
  pdf_document:
    keep_tex: yes
---

```{r setup, include=FALSE}
rm(list=ls())
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(tictoc)
source("../R/mixture_irt_md.R")
```

## Baseline independent voting model

Load 1-D results to construct dataset to use for null-ish model

```{r load_data}
tic()
load("../fits/one_dim_fit/bigsurveys_recoded_plus_estimates_1d.RData")
load("../fits/one_dim_fit/all_results_1d.RData")
res_1d <- all_res
rm(all_res)
toc()
```

Fit model 
```{r fit_null_model, cache=TRUE}
tic()
res_null <- map(names(res_1d), function(src) { 
    cat(paste0(src, "\n"))
    dat <- appended_data %>% 
              filter(source==src) %>%
              ungroup() %>%
              select(res_1d[[src]]$item_summary$item) %>% 
              as.matrix()
    em_mix_irt(dat, iters=1, irt_iters=1, w_alpha=c(0,1,0))
})
names(res_null) <- names(res_1d)
toc()
```

## Null perplexity

```{r null_model_perplexity, cache=TRUE}
tic()
perp_null <- map_df(names(res_null), 
    function(nm) {
      cat(paste0(nm,"\n"))
      cv_perplexity(appended_data %>% 
                   filter(source==nm) %>%
                   ungroup() %>%
                   select(res_1d[[nm]]$item_summary$item) %>% 
                   as.matrix(),
                 fit=res_null[[nm]],
                 xsamp=1000, iters=1, irt_iters=1)
    }) %>% 
    mutate(survey = rep(names(res_null), each=2))
toc()
```


## One Dim with mixing model

```{r 1d_model_perplexity, cache=TRUE}
tic()
perp_1d <- map_df(names(res_1d), 
    function(nm) {
      cat(paste0(nm,"\n"))
      cv_perplexity(appended_data %>% 
                   filter(source==nm) %>%
                   ungroup() %>%
                   select(res_1d[[nm]]$item_summary$item) %>% 
                   as.matrix(),
                 fit=res_1d[[nm]],
                 xsamp=1000)
    }) %>% 
    mutate(survey = rep(names(res_1d), each=2))
toc()
```

```{r 2d_model_perplexity, cache=TRUE}
tic()
load("../fits/two_dim_fit/all_results_2d.RData")
res_2d <- all_res
rm(all_res)
perp_2d <- map_df(names(res_2d), 
    function(nm) {
      cat(paste0(nm,"\n"))
      cv_perplexity(appended_data %>% 
                   filter(source==nm) %>%
                   ungroup() %>%
                   select(res_2d[[nm]]$item_summary$item) %>% 
                   as.matrix(),
                 fit=res_2d[[nm]])
    }) %>% 
    mutate(survey = rep(names(res_2d), each=2))
toc()
```

```{r 2d_no_mixing_perplexity, cache=TRUE}
tic()
load("../fits/no_mix_fit_2d/all_results_2d_no_mix.RData")
res_2d_nm <- all_res
rm(all_res)

perp_2d_nm <- map_df(names(res_2d_nm), 
    function(nm) {
      cat(paste0(nm,"\n"))
      cv_perplexity(appended_data %>% 
                   filter(source==nm) %>%
                   ungroup() %>% 
                   select(res_2d_nm[[nm]]$item_summary$item) %>% 
                   as.matrix(),
                 fit=res_2d_nm[[nm]])
    }) %>% 
    mutate(survey = rep(names(res_2d), each=2))
toc()
```

```{r make_table, cache=TRUE }
res <- perp_null %>%
  left_join(perp_1d, by=c("survey", "estimate"), suffix=c("_null", "_1d")) %>%
  left_join(perp_2d_nm, by=c("survey", "estimate")) %>%
  left_join(perp_2d, by=c("survey", "estimate"), suffix=c("_2d_nm", "_2d")) %>%
  select(survey, avg_n_null,  starts_with("loglik"), starts_with("perp")) %>% 
  mutate(survey = str_extract(survey, "\\d{4}"))
res
```

```{r}
library(kableExtra)
the_tbl <- res %>%
  group_by(survey) %>%
  summarize(avg_n_null = avg_n_null[1],
            across(starts_with("loglik"), ~ .x[2]),
            across(starts_with("perp"), ~ .x[1])) %>%
  kbl( digits = c(0, 1, rep(0,4), rep(2,4)),
       format="latex", 
       booktabs=TRUE,
       col.names = c("Survey", "Avg. no. of items", 
                     rep(c("Null", "1-D Mix.", "No mix.", "Mix."), 2)),
       align = c('c','c', rep('r', 4), rep('c', 4)),
       escape = F) %>%
    add_header_above(c(" " = 2, " "=2, "2-D"=2, " "=2, "2-D"=2)) %>%
    add_header_above(c(" "=2, "Log likelihood"=4, "Perplexity"=4)) %>%
    column_spec(2, width = "15mm") %>%
    kable_styling()

write_file(as.character(the_tbl) %>%
             str_replace_all("\\\\[begind]+\\{table\\}\\s*(\\n|$)", ""),
           "all_survey_perplexity.tex" )

the_tbl
```